# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# File Name          : run_paper_simulations.R
# Programmer Name    : Luis F. Campos
#                     luiscampos@g.harvard.edu
#
# Purpose            : This file contains code to run the simulations presented
#					   in Section 5 of "Worth Weighting? How to Think
#					   About and Use Sample Weights in Survey Experiments"
#
# Input              : simulation function files
# Output             : tables and figures
#
# References         : None
#
#
# Platform           : R
# Version            : v3.3.0
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Replication Information:
#   We do expect differences when running this code on different systems 
#   (and with different seeds), but the differences all tend to be quite 
#   small and the general trends remain. If you would like to replicate 
#   the exact numbers and figures used in the article, in the readme you 
#   will find the session info. Please install and load all package 
#   versions (including the R version) to ensure the exact replication. 
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #



set.seed(4)
# Run batch of simulations

# needed libraries
library( "plyr" )
library( "xtable" )
library( "reshape2" )
library( "extrafont" )


source( "_simulation_functions.R" )
source( "_survey_exp_toolkit.R" )

TESTING = FALSE

if ( TESTING ) {
    n = 500
    B = 10
    R = 200
} else {

    n=500     # Sample size
    B=10000   # simulations
    R=200    # bootstraps
}

K=7		  # Number of strata for PS



# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Make Figure 1 in Appendix
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
popA = make.population( type=3, add.noise = TRUE )
pdf('figure1_appendix.pdf')
	pop_sample.plots(popA)
dev.off()


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Simulation A in paper
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
cat( "\n\nSimulation A: Trying to make double-Hajek fail:\n" )
pop3 = make.population( type=3 )
reps3 = run.simulation( pop3, K=K, n=n, B=B, R=R )
res3 = analyze.simulation( pop3, reps3 )

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Simulation B in paper
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
cat( "\n\nSimulation B: A constant treatment effect:\n" )
pop2 = make.population( type=2, add.noise=FALSE )
reps2 = run.simulation( pop2, K=K, n=n, B=B, R=R )
res2 = analyze.simulation( pop2, reps2 )


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Note: Table 1 in Section 4 is created from latex output of line 56 and 64 above
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sink(file = 'table1.txt'); xtable(res3); xtable(res2); sink()

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Simulation C in paper
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

output = './'
gammas = seq(0, 1, 0.05)
m = 1:length(gammas)

results_all = replicate(20, {
	# sink()
	results = list()

	for(i in 1:length(gammas)){
		popD = make.population( type=5, gamma = gammas[i], add.noise=FALSE, make.plot = FALSE)

	    reps = run.simulation( popD, K=7, n=500, B=50, R=0 )
	    res = analyze.simulation( popD, reps, SE = FALSE )

	    results[[i]] = res[c(1, 3, 4, 7, 10),]
	}

	SDs = do.call('rbind', lapply(results, function(x){ x[,'sd']}))
	bias = do.call('rbind', lapply(results, function(x){ x[,'bias']}))
	list(SDs = SDs, bias = bias)
}, simplify = FALSE)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

save(results_all, file = 'SimulationD.RData')
load(file = 'SimulationD.RData')

output = './'


gammas = seq(0, 1, 0.05)


# gave error - LC 10/18 2:19pm
# SDs_all = lapply(apply(results_all, 2, function(x) x[1]), function(x) x[[1]])
# bias_all = lapply(apply(results_all, 2, function(x) x[2]), function(x) x[[1]])

SDs_all = lapply(results_all, function(x) x[[1]])
bias_all = lapply(results_all, function(x) x[[2]])
RMSE_all = mapply(function(s, b){sqrt(s^2 + b^2)}, SDs_all, bias_all, SIMPLIFY = FALSE)

SDs_means = Reduce('+', SDs_all)/length(SDs_all)
bias_means = Reduce('+', bias_all)/length(bias_all)
RMSE_means = Reduce('+', RMSE_all)/length(RMSE_all)




# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# plotting parameters
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
ylim1 = range(sapply(SDs_all, function(SDs){range(SDs[,-c(2, 3)])}))
ylim2 = range(sapply(bias_all, function(bias){range(bias[,-c(2, 3)])}))
ylim3 = range(sapply(RMSE_all, function(rmse){range(rmse[,-c(2, 3)])})) + c(0, 0.5)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Make results plot
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
lty = c(1, 1, 2, 2, 3)
lwd = rep(0.5, 5)
greyscale = 0.7
col = rep(rgb(greyscale, greyscale, greyscale), 5)
lty2 = c(1, 1, 2, 2, 3)
lwd2 = c(2, 2, 2, 2, 2)
col2 = c('#697887', 'black', '#697887', 'black', 'black')

cex = 1.35
cex.lab = 2


pdf(file = paste('figure1.pdf', sep = ''), width = 11, height = 6)

    par(mfrow = c(1, 3), mgp=c(1.7,0,0), mar=c(3.5,3.6,2.5,0.5))

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# Bias plots
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	plot(NULL, pch = "" ,xlim = range(gammas), ylim = ylim2, xlab = expression(gamma), ylab = 'Bias', cex.lab = cex.lab, xaxt='n', yaxt='n', main = '(a) Estimate Bias', cex.main = 1.4)
	axis(side = 2,at = seq(0, 8, 2),seq(0, 8, 2),tick = FALSE, cex.axis = 1.5)
	axis(side = 1,at = seq(0, 1, 0.2),seq(0, 1, 0.2),tick = FALSE, cex.axis = 1.5, pos = -1.75)
	for(i in 1:ncol(bias_all[[1]])){
		tmp = sapply(bias_all, function(bias){
		    # lines(gammas, bias[,i], lty = lty[i], lwd = lwd[i], col = col[i])
		    points(gammas, bias[,i], col = col[i], pch = 20, cex = 0.5)
		})
	}

	for(i in 1:ncol(bias_means)){
		lines(gammas, bias_means[,i], lty = lty2[i], lwd = lwd2[i], col = col2[i])
	}
	legend('topleft', c(expression((1)~~tau[S]), expression((2)~~nu[S]), expression((3)~~hat(tau)[SATE]), expression((4)~~hat(tau)[hh]), expression((5)~~hat(tau)[ps.hh])), lty = lty2, lwd = lwd2, col = col2, ncol = 1, cex = 1.65, box.col = 0)

	text(1, bias_means[21,] + 0.2*c(1.2, -1.2, -1.7, 3, 1.2), c('(1)', '(2)', '(3)', '(4)', '(5)'), cex = cex)

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# SD plot
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	plot(NULL, pch = "" ,xlim = range(gammas), ylim = ylim1, xlab = expression(gamma), ylab = 'SE', cex.lab = cex.lab, xaxt='n', yaxt='n', main = '(b) Estimate Standard Error', cex.main = 1.4)
	axis(side = 2,at = seq(0, 4, 0.5), seq(0, 4, 0.5),tick = FALSE, cex.axis = 1.5)
	axis(side = 1,at = seq(0, 1, 0.2),seq(0, 1, 0.2),tick = FALSE, cex.axis = 1.5, pos = 0.17)

	for(i in 1:ncol(SDs_all[[1]])){
		tmp = sapply(SDs_all, function(SDs){
		    # lines(gammas, SDs[,i], lty = lty[i], lwd = lwd[i], col = col[i])
		    points(gammas, SDs[,i], col = col[i], pch = 20, cex = 0.5)
		})
	}
	for(i in 1:ncol(SDs_means)){
		lines(gammas, SDs_means[,i], lty = lty2[i], lwd = lwd2[i], col = col2[i])
	}

	# legend('topright', c(expression(tau[S]), expression(nu[S]), expression(hat(tau)[SATE]), expression(hat(tau)[hh]), expression(hat(tau)[ps.hh])), lty = lty2, lwd = lwd2, col = col2)

	# add text labels to LHS
	# text(0, SDs_means[1,] + 0.1*c(-1, 1, 1, -1, 1), c('(1)', '(2)', '(3)', '(4)', '(5)'))
	text(1, SDs_means[21,] + 0.1*c(1, 1, -1, 1, 1), c('(1)', '(2)', '(3)', '(4)', '(5)'), cex = cex)

	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	# RMSE
	# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

	plot(NULL, pch = "" ,xlim = range(gammas), ylim = ylim3, xlab = expression(gamma), ylab = 'RMSE', cex.lab = cex.lab, xaxt='n', yaxt='n', main = '(b) Estimate Root Mean Squared Error', cex.main = 1.4)
	axis(side = 2,at = seq(0, 10, 2),seq(0, 10, 2),tick = FALSE, cex.axis = 1.5)
	axis(side = 1,at = seq(0, 1, 0.2),seq(0, 1, 0.2),tick = FALSE, cex.axis = 1.5, pos = -0.01)

	for(i in 1:ncol(RMSE_all[[1]])){
		tmp = sapply(RMSE_all, function(bias){
		    # lines(gammas, bias[,i], lty = lty[i], lwd = lwd[i], col = col[i])
		    points(gammas, bias[,i], col = col[i], pch = 20, cex = 0.5)
		})
	}

	for(i in 1:ncol(RMSE_means)){
		lines(gammas, RMSE_means[,i], lty = lty2[i], lwd = lwd2[i], col = col2[i])
	}
	# legend('topleft', c(expression((1)~~tau[S]), expression((2)~~nu[S]), expression((3)~~hat(tau)[SATE]), expression((4)~~hat(tau)[hh]), expression((5)~~hat(tau)[ps.hh])), lty = lty2, lwd = lwd2, col = col2)

	text(1, RMSE_means[21,] + 0.2*c(-1.3, -1, 1, 1.2, 1.2), c('(1)', '(2)', '(3)', '(4)', '(5)'), cex = cex)


dev.off()


save(list = ls(), file = 'run_paper_simulations_output.Rda')
